# import all packages and set plots to be embedded inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import calendar
%matplotlib inline
# suppress warnings from final output
import warnings
warnings.simplefilter("ignore")
# load in the dataset into a pandas dataframe
gobike_df = pd.read_csv('201902-fordgobike-tripdata.csv')
# drop missing vallues
gobike_df = gobike_df.dropna()
# convert features to datetime dtype
gobike_df['start_time']=pd.to_datetime(gobike_df['start_time'])
gobike_df['end_time']=pd.to_datetime(gobike_df['end_time'])
gobike_df['hour_of_day'] = gobike_df.start_time.dt.hour.astype(int)
gobike_df['day_of_week'] = gobike_df.start_time.dt.strftime('%a')
#gobike_df['month_of_year'] = pd.DatetimeIndex(gobike_df['start_time']).month
#gobike_df['month_of_year'] = gobike_df['month_of_year'].astype(int).apply(lambda x: calendar.month_abbr[x])
gobike_df['member_age'] = 2022-gobike_df['member_birth_year'].astype(int)
log_binsize=0.025
bin_edges=10**np.arange(0,
np.log10(gobike_df.duration_sec.max())+log_binsize, log_binsize)
plt.figure(figsize=[8,6])
plt.hist(data=gobike_df, x='duration_sec', bins=bin_edges)
plt.xscale('log')
plt.xticks([50, 200, 500, 1500, 3000, 6000],
[50, 200, 500, 1500, 3000, 6000])
plt.xlabel('Duration (seconds)')
plt.xlim([50,6000])
plt.title('Distribution of Trip Duration (seconds)', fontsize=20)
plt.show()
binsize=3
bin_edges=np.arange(20, gobike_df.member_age.max()+binsize, binsize)
plt.figure(figsize=[8,6])
plt.hist(data=gobike_df, x='member_age', bins=bin_edges)
plt.xlabel('Age (years)')
plt.xlim([15,80])
plt.title('Age Distribution', fontsize=20)
plt.show()
#filtering the values less than 6000 in `duration_sec` and less than 80 in `member_age`
outliers=((gobike_df.duration_sec>6000)|(gobike_df.member_age>80))
outlier_proportion = (outliers.sum()/gobike_df.shape[0])*100
gobike_df=gobike_df[-outliers]
# Plot bar chart in %
plt.figure(figsize=[8,6])
explode = (0, 0.1)
sorted_counts = gobike_df['user_type'].value_counts()
plt.pie(sorted_counts, explode=explode, labels = sorted_counts.index,
autopct='%1.1f%%',shadow=True, startangle = 90,counterclock = False)
plt.title('Subscriber vs. Customer (in %)', fontsize=14, fontweight='bold');
# plotting hour of the day and day of the week together
fig, ax=plt.subplots(nrows=2, figsize=[10,8])
default_color=sns.color_palette()[0]
sns.countplot(data=gobike_df, x='hour_of_day', color=default_color, ax=ax[0])
order = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
sns.countplot(data=gobike_df, x='day_of_week', color=default_color, ax=ax[1], order=order)
fig.suptitle('Trips count by hour and day', fontsize=20)
plt.show()
sns.boxplot(data=gobike_df, x="day_of_week", y="member_age", showfliers=False, order=order);
plt.title('Age of riders by days of the week', fontsize=15);
sns.boxplot(data=gobike_df, x="day_of_week", y="duration_sec", showfliers=False, order=order);
plt.title('Duration of trips taken by days of the week', fontsize=15);
sns.boxplot(data=gobike_df, x="user_type", y="duration_sec", showfliers=False);
plt.title('Duration of the trips taken by the type of users', fontsize=15);
sns.boxplot(data=gobike_df, x="user_type", y="member_age", showfliers=False);
plt.title('Age of the type of users', fontsize=15);
#plotting a mapbox for non-deviants and positive-deviants in domain 4
fig = px.scatter_mapbox(gobike_df, lat='start_station_latitude', lon='start_station_longitude',
width=800, zoom=4, color='member_gender',
height=600, hover_data=['user_type'],
)
fig.update_layout(mapbox_style='open-street-map')
fig.show()
!jupyter nbconvert FORD_GOBIKE_EXPLANATORY_ANALYSIS.ipynb --to slides --post serve --no-input --no-prompt
[NbConvertApp] Converting notebook FORD_GOBIKE_EXPLANATORY_ANALYSIS.ipynb to slides
[NbConvertApp] Writing 11832619 bytes to FORD_GOBIKE_EXPLANATORY_ANALYSIS.slides.html
[NbConvertApp] Redirecting reveal.js requests to https://cdnjs.cloudflare.com/ajax/libs/reveal.js/3.5.0
Traceback (most recent call last):
File "C:\Users\PANDORA\anaconda3\Scripts\jupyter-nbconvert-script.py", line 10, in <module>
sys.exit(main())
File "C:\Users\PANDORA\anaconda3\lib\site-packages\jupyter_core\application.py", line 269, in launch_instance
return super().launch_instance(argv=argv, **kwargs)
File "C:\Users\PANDORA\anaconda3\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
app.start()
File "C:\Users\PANDORA\AppData\Roaming\Python\Python39\site-packages\nbconvert\nbconvertapp.py", line 414, in start
self.convert_notebooks()
File "C:\Users\PANDORA\AppData\Roaming\Python\Python39\site-packages\nbconvert\nbconvertapp.py", line 588, in convert_notebooks
self.convert_single_notebook(notebook_filename)
File "C:\Users\PANDORA\AppData\Roaming\Python\Python39\site-packages\nbconvert\nbconvertapp.py", line 555, in convert_single_notebook
self.postprocess_single_notebook(write_results)
File "C:\Users\PANDORA\AppData\Roaming\Python\Python39\site-packages\nbconvert\nbconvertapp.py", line 525, in postprocess_single_notebook
self.postprocessor(write_results)
File "C:\Users\PANDORA\AppData\Roaming\Python\Python39\site-packages\nbconvert\postprocessors\base.py", line 27, in __call__
self.postprocess(input)
File "C:\Users\PANDORA\AppData\Roaming\Python\Python39\site-packages\nbconvert\postprocessors\serve.py", line 91, in postprocess
http_server.listen(self.port, address=self.ip)
File "C:\Users\PANDORA\anaconda3\lib\site-packages\tornado\tcpserver.py", line 151, in listen
sockets = bind_sockets(port, address=address)
File "C:\Users\PANDORA\anaconda3\lib\site-packages\tornado\netutil.py", line 161, in bind_sockets
sock.bind(sockaddr)
OSError: [WinError 10048] Only one usage of each socket address (protocol/network address/port) is normally permitted